library(foreign)
library(readstata13)
library(dplyr)
library(survey)
library(Hmisc)


setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")
load("merged_nr_soma.RData")

uk<-data
bes<-read.dta13("BES2015_W6_v3.6.dta", generate.factors = F)


###filter to twitter users
bes<-filter(bes, twitterUse=="Yes")
uk<-filter(uk, twitter_user==1 )

##can't have any NA's in relevant variables for raking to work

##also need to align the categories

uk<-filter(uk, is.na(woman)==F)

uk<-filter(uk, is.na(profile_gross_household) == F )



uk<-filter(uk, is.na(profile_education_age) == F )

bes<-filter(bes, Age >17)

bes$profile_education_age<-as.numeric(bes$profile_education_age)

##drop the NAs
bes<-filter(bes, profile_education_age != 7)

bes<-filter(bes, is.na(profile_gross_household) == F )


bes<-filter(bes, as.numeric(profile_gross_household) <16)


uk$gender<-round(uk$woman)
uk$gender[uk$gender ==1]<-"Male"
uk$gender[uk$gender ==2]<-"Female"


prop.table(table(uk$gender))

prop.table(table(bes$gender))




##education


uk$profile_education_age<-round(uk$profile_education_age)


###gonna assign anyone still in school to the 20+ category for bes data



##drop the NAs
bes$profile_education_age[bes$profile_education_age == 6]<-5


##age 

table(uk$Age)

uk$Age<-round(uk$age)

wtd.quantile(bes$Age, weights = bes$wt_full_W6)



##split into quartiles based on BES distribution, using BES weights


uk$age_q<-1
uk$age_q[uk$Age >= summary(bes$Age)[2] &  uk$Age < summary(bes$Age)[3] ]<-2
uk$age_q[uk$Age >= summary(bes$Age)[3] &  uk$Age < summary(bes$Age)[5] ]<-3
uk$age_q[uk$Age >= summary(bes$Age)[5] ]<-4

qts<-wtd.quantile(bes$Age, weights = bes$wt_full_W6)

bes$age_q<-1
bes$age_q[bes$Age >= qts[2] &  bes$Age < qts[3] ]<-2
bes$age_q[bes$Age >= qts[3] &  bes$Age < qts[4] ]<-3
bes$age_q[bes$Age >= qts[4] ]<-4




 
####income


table(bes$profile_gross_household)

#recode as numeric
bes$profile_gross_household_num<-as.numeric(bes$profile_gross_household)

bes$profile_gross_household_num[bes$profile_gross_household_num==1]<-2500
bes$profile_gross_household_num[bes$profile_gross_household_num==2]<-7500
bes$profile_gross_household_num[bes$profile_gross_household_num==3]<-12500
bes$profile_gross_household_num[bes$profile_gross_household_num==4]<-17500
bes$profile_gross_household_num[bes$profile_gross_household_num==5]<-22500

bes$profile_gross_household_num[bes$profile_gross_household_num==6]<-27500
bes$profile_gross_household_num[bes$profile_gross_household_num==7]<-32500
bes$profile_gross_household_num[bes$profile_gross_household_num==8]<-37500
bes$profile_gross_household_num[bes$profile_gross_household_num==9]<-42500
bes$profile_gross_household_num[bes$profile_gross_household_num==10]<-47500

bes$profile_gross_household_num[bes$profile_gross_household_num==11]<-55000
bes$profile_gross_household_num[bes$profile_gross_household_num==12]<-65000
bes$profile_gross_household_num[bes$profile_gross_household_num==13]<-85000
bes$profile_gross_household_num[bes$profile_gross_household_num==14]<-125000
bes$profile_gross_household_num[bes$profile_gross_household_num==15]<-200000


summary(bes$profile_gross_household_num)



summary(uk$profile_gross_household)

####new var

uk$inc_q<-1
uk$inc_q[uk$profile_gross_household >= summary(bes$profile_gross_household_num)[2] &  uk$profile_gross_household < summary(bes$profile_gross_household_num)[3] ]<-2
uk$inc_q[uk$profile_gross_household >= summary(bes$profile_gross_household_num)[3] &  uk$profile_gross_household < summary(bes$profile_gross_household_num)[5] ]<-3
uk$inc_q[uk$profile_gross_household >= summary(bes$profile_gross_household_num)[5] ]<-4

qts<-wtd.quantile(bes$profile_gross_household_num, weights = bes$wt_full_W6)

bes$inc_q<-1
bes$inc_q[bes$profile_gross_household_num >= qts[2] &  bes$profile_gross_household_num < qts[3] ]<-2
bes$inc_q[bes$profile_gross_household_num >= qts[3] &  bes$profile_gross_household_num < qts[4] ]<-3
bes$inc_q[bes$profile_gross_household_num >= qts[4] ]<-4





##################rake

svy1 <- svydesign(ids=~1,data=uk)
rclus1 <- as.svrepdesign(svy1)

svymean(~gender, rclus1)

table(bes$gender)
x<-wtd.table(bes$gender, weights=bes$wt_full_W6)

##create target variables
pop.gender <- data.frame(gender = c("Male", "Female") , Freq = round(as.numeric(wtd.table(bes$gender, weights=bes$wt_full_W6)[[2]])))
pop.educ <- data.frame(profile_education_age = c(1,2,3,4,5) , Freq = round(as.numeric(wtd.table(bes$profile_education_age, weights=bes$wt_full_W6)[[2]])))
pop.age <- data.frame(age_q = c(1,2,3,4) , Freq = round(as.numeric(wtd.table(bes$age_q, weights=bes$wt_full_W6)[[2]])))
pop.inc <- data.frame(inc_q = c(1,2,3,4) , Freq = round(as.numeric(wtd.table(bes$inc_q, weights=bes$wt_full_W6)[[2]])))



rclus1_4 <- rake(rclus1, sample.margins =  list(~gender , ~profile_education_age, ~age_q, ~inc_q),
                population.margins =  list(pop.gender, pop.educ, pop.age, pop.inc))

svymean(~gender, rclus1_4)

#################

uk$raked_weights<-rclus1_4$pweights



save(uk, file = "combined_w_raked_weights.RData")

